Bulk RNASeq data from Mayo Clinic.

# gaussian to run the linear regression 
pheno_list = c("Braak" = "gaussian",
               "thal" = "gaussian"
#               "diagnosis" = "binomial" # AD x CT 
)
work_dir = "/pastel/Github_scripts/SpeakEasy_dlpfc/bulk_dlpfc/replication/"
resources = "/pastel/resources/MayoClinic/RNAseq/"
net_dir = "/pastel/projects/speakeasy_dlpfc/SpeakEasy_net_MF/"

library(tidyverse)
library(janitor)
library(WGCNA)

We already have the modules from our ROSMAP dataset, so I’ll use this as gene list and calculate the eigengenes and module average expression for the same genes for Mayo. Then, we’ll run regressions with their AD-related covariates to check for associations.

Input external dataset

metadata = read.csv(paste0(resources, "RNAseq_Harmonization_Mayo_combined_metadata.csv"))
metadata$ageDeath = as.numeric(gsub("\\+", "", metadata$ageDeath))

message(paste0("Unique individualID: ", length(unique(metadata$individualID)))) 
## Unique individualID: 355
message(paste0("Unique specimenID: ", length(unique(metadata$specimenID)))) 
## Unique specimenID: 597
table(metadata$tissue)
## 
##      cerebellum temporal cortex 
##             278             319

Select samples

To avoid repeated measures.

# metadata 
metadata_temp = metadata[metadata$exclude == "FALSE", ] # Remove the samples that didn't pass QC 
# metadata_temp = metadata_temp[metadata_temp$diagnosis %in% c("Alzheimer Disease", "control"), ] # We filter to get only AD and CT
metadata_sel = metadata_temp[metadata_temp$tissue == "temporal cortex", ] # select the temporal cortex
# expression 
expr_mtx = read.table(paste0(resources, "Mayo_Normalized_counts_CQN.tsv"), check.names = F, header = T)
rownames(expr_mtx) = expr_mtx$feature
expr_mtx_sel = expr_mtx[, colnames(expr_mtx) %in% metadata_sel$specimenID] # select the same samples from the metadata 

Input ROSMAP networks

Bulk DLPFC.

modules_file = read.table(paste0(net_dir, "geneBycluster.txt"), header = T)

modules_size = as.data.frame( table(modules_file$cluster_lv3))
colnames(modules_size) = c("module", "n_nodes")
too_small = as.character( modules_size$module[modules_size$n_nodes < 30]) # Get modules < 30 nodes to be removed
net_output = modules_file %>% filter(! cluster_lv3 %in% as.integer(too_small)) # the clusters with at least 30 nodes 
net_output = net_output[,c("ensembl", "cluster_lv3", "gene_type", "symbol")]
net_output$ensembl2 = gsub("(.*)\\.(.*)", "\\1",net_output$ensembl)

message(paste0("Number of modules: ", length(unique(net_output$cluster_lv3))))
## Number of modules: 34

Average expression - Mayo

I got our gene list and calculated the ME for Mayo Clinic expression.

gene_mod_map = data.frame(ensembl = rownames(expr_mtx_sel))
gene_mod_map = gene_mod_map %>% inner_join(net_output[,-1], by = c("ensembl" = "ensembl2"))

expr_mtx_sel_ord = expr_mtx_sel[gene_mod_map$ensembl,] # order the matrix
identical(rownames(expr_mtx_sel_ord), gene_mod_map$ensembl) # must be TRUE 
## [1] TRUE
colors_mod = gene_mod_map$cluster_lv3
expr_mtx_sel_ord_t = as.data.frame(t(expr_mtx_sel_ord))
external_ME = moduleEigengenes(expr_mtx_sel_ord_t, colors = colors_mod)
mod_average = external_ME$averageExpr # data.frame

# save results
# save(external_ME, metadata_sel, file = paste0(work_dir, "Mayo_moduleEigengenes_lv3.Rdata"))
# write.table(external_ME$eigengenes, file = paste0(work_dir, "Mayo_moduleEigengenes_lv3.txt"), sep = "\t", quote = F, row.names = T)
# write.table(external_ME$averageExpr, file = paste0(work_dir, "Mayo_moduleaverageExpr_lv3.txt"), sep = "\t", quote = F, row.names = T)

createDT(mod_average)

Regressions

Input: Average expression by module.

Numbers and colors : -log10(nominal pvalue)

cutpoints:

< 0.001 = ***

0.01 = **

0.05 = *

0.1 = .

1 = ” ”

data4linear_reg <- mod_average # match codes name 
phenotype_dt = metadata_sel[match(rownames(data4linear_reg), metadata_sel$specimenID), ]
all(rownames(data4linear_reg) == phenotype_dt$specimenID) # Must be TRUE. Match IDs
## [1] TRUE
# hist(phenotype_dt$Braak)

message(paste0("Unique individualID: ", length(unique(phenotype_dt$individualID)))) 
## Unique individualID: 258
message(paste0("Unique specimenID: ", length(unique(phenotype_dt$specimenID)))) 
## Unique specimenID: 258
res_test = run_module_trait_association(data4linear_reg, phenotype_dt, pheno_list, covariates = c("ageDeath","sex")) # covariates to adjust
matrix_rsquared = res_test$matrix_rsquared
matrix_pvalue = res_test$matrix_pvalue

### Get modules >= 30 nodes to show:
to_show = paste0("AE",as.character( modules_size$module[modules_size$n_nodes >= 30])) 

# pdf("/pastel/Github_scripts/SpeakEasy_dlpfc/figures4paper/reg_Mayo_adjcov.pdf", width = 2.5, height = 11)
plot_module_trait_association_heatmap(res_test, to_show)

# dev.off()

Top results

Top result by covariate.

# res_test$all_stats_df 
createDT(res_test$all_stats_df %>% group_by(phenotype) %>% slice_head(n = 1))

Nominal pvalue

createDT(matrix_pvalue)

Significant results

Adj P < 0.05

Threshold: At least one module with adjusted pvalue < 0.05.

Method: Bonferroni by column.

plot_module_trait_association_heatmap(res_test, to_show, show_only_significant = T, signif_cutoff = 0.05)
## Adjusting P-values by Bonferroni correction by each phenotype separately. Double check if rows of res_test$matrix_pvalue are phenotypes and columns are your features (e.g. modules)
## Adjusting P-values by Bonferroni correction by each phenotype separately. Double check if rows of res_test$matrix_pvalue are phenotypes and columns are your features (e.g. modules)

Adj P < 0.000001

plot_module_trait_association_heatmap(res_test, to_show, show_only_significant = T, signif_cutoff = 0.000001)
## Adjusting P-values by Bonferroni correction by each phenotype separately. Double check if rows of res_test$matrix_pvalue are phenotypes and columns are your features (e.g. modules)
## Adjusting P-values by Bonferroni correction by each phenotype separately. Double check if rows of res_test$matrix_pvalue are phenotypes and columns are your features (e.g. modules)

res_test$all_stats_df$adj_pvalue = p.adjust(res_test$all_stats_df$nom_p, 
                                            method = "fdr")

createDT(res_test$all_stats_df)

Session info

sessionInfo()
## R version 4.1.2 (2021-11-01)
## Platform: x86_64-pc-linux-gnu (64-bit)
## Running under: CentOS Stream 8
## 
## Matrix products: default
## BLAS/LAPACK: /usr/lib64/libopenblasp-r0.3.15.so
## 
## locale:
##  [1] LC_CTYPE=en_US.UTF-8       LC_NUMERIC=C              
##  [3] LC_TIME=en_US.UTF-8        LC_COLLATE=en_US.UTF-8    
##  [5] LC_MONETARY=en_US.UTF-8    LC_MESSAGES=en_US.UTF-8   
##  [7] LC_PAPER=en_US.UTF-8       LC_NAME=C                 
##  [9] LC_ADDRESS=C               LC_TELEPHONE=C            
## [11] LC_MEASUREMENT=en_US.UTF-8 LC_IDENTIFICATION=C       
## 
## attached base packages:
## [1] grid      stats     graphics  grDevices utils     datasets  methods  
## [8] base     
## 
## other attached packages:
##  [1] RColorBrewer_1.1-3    circlize_0.4.16       ComplexHeatmap_2.15.4
##  [4] performance_0.12.2    lmerTest_3.1-3        lme4_1.1-35.1        
##  [7] Matrix_1.6-5          WGCNA_1.72-5          fastcluster_1.2.6    
## [10] dynamicTreeCut_1.63-1 janitor_2.2.0         lubridate_1.9.3      
## [13] forcats_1.0.0         stringr_1.5.1         dplyr_1.1.4          
## [16] purrr_1.0.2           readr_2.1.5           tidyr_1.3.1          
## [19] tibble_3.2.1          ggplot2_3.5.1         tidyverse_2.0.0      
## 
## loaded via a namespace (and not attached):
##   [1] minqa_1.2.7            colorspace_2.1-1       rjson_0.2.21          
##   [4] snakecase_0.11.1       htmlTable_2.4.3        XVector_0.34.0        
##   [7] GlobalOptions_0.1.2    base64enc_0.1-3        clue_0.3-65           
##  [10] rstudioapi_0.16.0      DT_0.33                bit64_4.0.5           
##  [13] AnnotationDbi_1.56.2   fansi_1.0.6            codetools_0.2-18      
##  [16] splines_4.1.2          doParallel_1.0.17      cachem_1.1.0          
##  [19] impute_1.68.0          knitr_1.48             Formula_1.2-5         
##  [22] jsonlite_1.8.8         nloptr_2.1.1           Cairo_1.6-2           
##  [25] cluster_2.1.2          GO.db_3.14.0           png_0.1-8             
##  [28] compiler_4.1.2         httr_1.4.7             backports_1.5.0       
##  [31] fastmap_1.2.0          cli_3.6.3              htmltools_0.5.8.1     
##  [34] tools_4.1.2            gtable_0.3.5           glue_1.7.0            
##  [37] GenomeInfoDbData_1.2.7 reshape2_1.4.4         Rcpp_1.0.13           
##  [40] Biobase_2.54.0         jquerylib_0.1.4        vctrs_0.6.5           
##  [43] Biostrings_2.62.0      preprocessCore_1.61.0  nlme_3.1-153          
##  [46] iterators_1.0.14       crosstalk_1.2.1        insight_0.20.2        
##  [49] xfun_0.46              timechange_0.3.0       lifecycle_1.0.4       
##  [52] zlibbioc_1.40.0        MASS_7.3-60.0.1        scales_1.3.0          
##  [55] hms_1.1.3              parallel_4.1.2         yaml_2.3.10           
##  [58] memoise_2.0.1          gridExtra_2.3          sass_0.4.9            
##  [61] rpart_4.1-15           stringi_1.8.4          RSQLite_2.3.7         
##  [64] highr_0.11             S4Vectors_0.32.4       foreach_1.5.2         
##  [67] checkmate_2.3.2        BiocGenerics_0.40.0    boot_1.3-28           
##  [70] shape_1.4.6.1          GenomeInfoDb_1.30.1    rlang_1.1.4           
##  [73] pkgconfig_2.0.3        matrixStats_1.3.0      bitops_1.0-8          
##  [76] evaluate_0.24.0        lattice_0.20-45        htmlwidgets_1.6.4     
##  [79] bit_4.0.5              tidyselect_1.2.1       plyr_1.8.9            
##  [82] magrittr_2.0.3         R6_2.5.1               magick_2.8.4          
##  [85] IRanges_2.28.0         generics_0.1.3         Hmisc_5.1-3           
##  [88] DBI_1.2.3              pillar_1.9.0           foreign_0.8-81        
##  [91] withr_3.0.0            survival_3.7-0         KEGGREST_1.34.0       
##  [94] RCurl_1.98-1.16        nnet_7.3-16            crayon_1.5.3          
##  [97] utf8_1.2.4             tzdb_0.4.0             rmarkdown_2.27        
## [100] GetoptLong_1.0.5       data.table_1.15.4      blob_1.2.4            
## [103] digest_0.6.36          numDeriv_2016.8-1.1    stats4_4.1.2          
## [106] munsell_0.5.1          bslib_0.8.0